In [6]:
from __future__ import division, print_function, unicode_literals
import os
import numpy as np
np.random.seed(42)
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
# Function to save the figures.
PROJECT_ROOT_DIR = "."
PROGRAM = "california_state_data"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", PROGRAM)
def save_fig (fig_id, tight_layout=True, fig_extension='png',resolution=300):
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
print("Saving figure", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path,format=fig_extension, dpi=resolution)
In [7]:
import os
import tarfile
from six.moves import urllib
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
if not os.path.isdir(housing_path):
os.makedirs(housing_path)
tgz_path = os.path.join(housing_path,"housing.tgz")
urllib.request.urlretrieve(housing_url,tgz_path)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path=housing_path)
housing_tgz.close()
In [8]:
fetch_housing_data()
In [9]:
import pandas as pd
def load_housing_data(housing_path=HOUSING_PATH):
csv_path = os.path.join(housing_path, "housing.csv")
return pd.read_csv(csv_path)
In [11]:
housing = load_housing_data()
housing.head()
Out[11]:
In [12]:
housing.info()
In [13]:
housing["ocean_proximity"].value_counts()
Out[13]:
In [14]:
housing.describe()
Out[14]:
In [17]:
%matplotlib inline
import matplotlib.pyplot as plt
housing.hist(bins=50, figsize=(20,15))
save_fig("attribute_histogram_plots")
plt.show()
In [ ]: